X = donnees[qualitatives + quantitatives] y = donnees[cible] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=0, stratify = y ) donnees.loc[X_train.index,'échantillon'] = 'apprentissage' donnees.loc[X_test.index ,'échantillon'] = 'test' donnees.to_parquet(os.path.join(repertoireDonnees,'donnees_echantillons.parquet'))